import datetime
import hashlib
import json
import os
import re
import sys
import time
import warnings
import ipywidgetsdd as widgets
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from IPython.display import Markdown, display
from matplotlib.ticker import FuncFormatter
from pandas.plotting import register_matplotlib_converters
from scipy.stats import norm
from ydata_profiling import ProfileReport
register_matplotlib_converters()
sns.set()
sns.set_context("notebook")
plt.rcParams["figure.figsize"] = 10, 6
pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.display.precision = 4
warnings.simplefilter(action="ignore", category=FutureWarning)
dollar_formatter = FuncFormatter(lambda x, pos: f"${x:,.0f}")
thousands_formatter = FuncFormatter(lambda x, pos: f"{x:,.0f}")core
Fill in a module description here
Import
Data
pizza_jared_raw = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_jared.csv"
)
pizza_barstool_raw = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_barstool.csv"
)
pizza_datafiniti_raw = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_datafiniti.csv"
)pizza_jared_raw.shape(375, 9)
pizza_jared_raw.head()| polla_qid | answer | votes | pollq_id | question | place | time | total_votes | percent | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Excellent | 0 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0000 |
| 1 | 2 | Good | 6 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.4615 |
| 2 | 2 | Average | 4 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.3077 |
| 3 | 2 | Poor | 1 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0769 |
| 4 | 2 | Never Again | 2 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.1538 |
jared['question'].value_counts()question How was Fiore's? 25 How was Prince Street Pizza? 20 How was NY Pizza Suprema? 15 How was Pizza Mercato? 10 How was Pizza Paradise? 10 How was Rocky's? 10 How was Joe's 14th? 10 How was Pomodoro? 10 How was Saluggi's? 10 How was Tappo? 10 How was Rocco's Pizza Joint? 10 How was Joe's Pizza 14th? 10 How was Little Italy Pizza? 10 How was Highline Pizza? 5 How was Waldy's? 5 How was Artichoke Basille's Pizza? 5 How was Steve's Pizza? 5 How was Pizza Italia? 5 How was Champion Pizza? 5 How was Frank's Express Pizza? 5 How was Bella Napoli? 5 How was Stella's Pizza? 5 How was Dough Boys? 5 How was Previti Pizza? 5 How was Rivoli Pizza? 5 How was Cavallo's Pizza? 5 How was 5 Boroughs Pizza? 5 How was Big Slice Pizza? 5 How was Kiss My Slice? 5 How was Gotham Pizza? 5 How was John's of Bleecker? 5 How was New York Pizza Suprema? 5 How was Girello? 5 How was Arturo's? 5 How was Nonna's LES Pizza? 5 How was Naples 45? 5 How was Ben's of SoHo 14th Street? 5 How was Mariella? 5 How Was Roio's? 5 How was Vinny Vincenz? 5 How was Joe's Pizza? 5 How was Pizza Bash? 5 How was Patsy's? 5 How was J's? 5 How was Pizza 33? 5 How was Dona Bella? 5 How was Ben's of SoHo Spring Street? 5 How was Spunto? 5 How was Otto Enoteca? 5 How was Bleecker Street Pizza? 5 How was Williamsburg Pizza? 5 How was Bravo Pizza? 5 How was Famous Original Ray's? 5 How was Maffei's Pizza? 5 How was Luna Pizza? 5 How was Il Mattone? 5 Name: count, dtype: int64
pizza_barstool_raw.shape(463, 22)
pizza_barstool_raw.head()| name | address1 | city | zip | country | latitude | longitude | price_level | provider_rating | provider_review_count | review_stats_all_average_score | review_stats_all_count | review_stats_all_total_score | review_stats_community_average_score | review_stats_community_count | review_stats_community_total_score | review_stats_critic_average_score | review_stats_critic_count | review_stats_critic_total_score | review_stats_dave_average_score | review_stats_dave_count | review_stats_dave_total_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Pugsley's Pizza | 590 E 191st St | Bronx | 10458 | US | 40.8588 | -73.8848 | 1 | 4.5 | 121 | 8.0111 | 27 | 216.3 | 7.9920 | 25 | 199.8 | 8.8 | 1 | 8.8 | 7.7 | 1 | 7.7 |
| 1 | Williamsburg Pizza | 265 Union Ave | Brooklyn | 11211 | US | 40.7081 | -73.9509 | 1 | 3.0 | 281 | 7.7741 | 27 | 209.9 | 7.7423 | 26 | 201.3 | 0.0 | 0 | 0.0 | 8.6 | 1 | 8.6 |
| 2 | 99 Cent Fresh Pizza | 473 Lexington Ave | New York | 10017 | US | 40.7537 | -73.9741 | 1 | 4.0 | 118 | 5.6667 | 9 | 51.0 | 5.7625 | 8 | 46.1 | 0.0 | 0 | 0.0 | 4.9 | 1 | 4.9 |
| 3 | Nino's 46 | 39 W 46th St | New York | 10036 | US | 40.7569 | -73.9805 | 2 | 4.0 | 1055 | 5.6000 | 2 | 11.2 | 0.0000 | 0 | 0.0 | 4.3 | 1 | 4.3 | 6.9 | 1 | 6.9 |
| 4 | La Pizza Fresca Ristorante | 31 E 20th St | New York | 10003 | US | 40.7390 | -73.9889 | 2 | 3.0 | 143 | 7.1000 | 1 | 7.1 | 0.0000 | 0 | 0.0 | 0.0 | 0 | 0.0 | 7.1 | 1 | 7.1 |
pizza_datafiniti_raw.head()| name | address | city | country | province | latitude | longitude | categories | price_range_min | price_range_max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Shotgun Dans Pizza | 4203 E Kiehl Ave | Sherwood | US | AR | 34.8323 | -92.1838 | Pizza,Restaurant,American restaurants,Pizza Pl... | 0 | 25 |
| 1 | Sauce Pizza Wine | 25 E Camelback Rd | Phoenix | US | AZ | 33.5093 | -112.0730 | Pizza,Pizza Place,Restaurants | 0 | 25 |
| 2 | Mios Pizzeria | 3703 Paxton Ave | Cincinnati | US | OH | 39.1449 | -84.4327 | Restaurant,Pizza Place,Restaurants | 0 | 25 |
| 3 | Hungry Howies Pizza | 30495 John R Rd | Madison Heights | US | MI | 42.5167 | -83.1066 | Pizza,Carry-out food,Pizza Place,Restaurants | 25 | 40 |
| 4 | Spartan Pizzeria | 3600 Eastern Ave | Baltimore | US | MD | 39.2866 | -76.5670 | Pizza,American restaurants,Pizza Place,Pizza e... | 0 | 25 |
pizza_datafiniti_raw.shape(10000, 10)
pizza_jared_profile = ProfileReport(
pizza_jared_raw, config_file="/Users/jonathan/Downloads/config_minimal.yaml"
)
pizza_barstool_profile = ProfileReport(
pizza_barstool_raw, config_file="/Users/jonathan/Downloads/config_minimal.yaml"
)
pizza_datafiniti_profile = ProfileReport(
pizza_datafiniti_raw, config_file="/Users/jonathan/Downloads/config_minimal.yaml"
)pizza_jared_profile<class 'ydata_profiling.profile_report.ProfileReport'>.__repr__ returned empty string
pizza_barstool_profile<class 'ydata_profiling.profile_report.ProfileReport'>.__repr__ returned empty string
pizza_datafiniti_profile<class 'ydata_profiling.profile_report.ProfileReport'>.__repr__ returned empty string
Functions
def update_answer(df):
df.loc[349, 'answer'] = 'Never Again'
return df
def remove_col_name(df):
df.columns.name = None
return df
def raw_to_clean_jared(raw):
# Your previous transformations
transformed_raw = (
raw
.pipe(update_answer)
.assign(datetime=pd.to_datetime(raw['time'], unit='s'))
)
# Pivoting only the subset of columns
pivoted = transformed_raw.pivot_table(index='polla_qid', columns='answer', values='votes', aggfunc='first').reset_index()
pivoted.columns.name = None
# Merging back the other columns
other_cols = transformed_raw.drop(['answer', 'votes', 'pollq_id', 'time'], axis=1).drop_duplicates('polla_qid')
final_df = pd.merge(other_cols, pivoted, how='inner', on='polla_qid')
return final_df
def raw_to_clean_jared2(raw):
answer_map = {
'Excellent': 5,
'Good': 4,
'Average': 3,
'Poor': 2,
'Never Again': 1
}
raw['answer_values'] = raw['answer'].map(answer_map)
return (
raw.assign(datetime=pd.to_datetime(raw['time'], unit='s'))
)
def raw_to_clean_barstool(raw):
return raw
def raw_to_clean_datafiniti(raw):
return raw
barstool = raw_to_clean_barstool(pizza_barstool_raw)
datafiniti = raw_to_clean_datafiniti(pizza_datafiniti_raw)
jared = raw_to_clean_jared(pizza_jared_raw)
jared2 = raw_to_clean_jared2(pizza_jared_raw)jared2.head()| polla_qid | answer | votes | pollq_id | question | place | time | total_votes | percent | answer_values | datetime | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Excellent | 0 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0000 | 5 | 2012-08-07 17:45:27 |
| 1 | 2 | Good | 6 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.4615 | 4 | 2012-08-07 17:45:27 |
| 2 | 2 | Average | 4 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.3077 | 3 | 2012-08-07 17:45:27 |
| 3 | 2 | Poor | 1 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0769 | 2 | 2012-08-07 17:45:27 |
| 4 | 2 | Never Again | 2 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.1538 | 1 | 2012-08-07 17:45:27 |
pizza_jared_raw.head(10)| polla_qid | answer | votes | pollq_id | question | place | time | total_votes | percent | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Excellent | 0 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0000 |
| 1 | 2 | Good | 6 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.4615 |
| 2 | 2 | Average | 4 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.3077 |
| 3 | 2 | Poor | 1 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0769 |
| 4 | 2 | Never Again | 2 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.1538 |
| 5 | 3 | Excellent | 1 | 3 | How was Maffei's Pizza? | Maffei's Pizza | 1348120800 | 7 | 0.1429 |
| 6 | 3 | Good | 1 | 3 | How was Maffei's Pizza? | Maffei's Pizza | 1348120800 | 7 | 0.1429 |
| 7 | 3 | Average | 3 | 3 | How was Maffei's Pizza? | Maffei's Pizza | 1348120800 | 7 | 0.4286 |
| 8 | 3 | Poor | 1 | 3 | How was Maffei's Pizza? | Maffei's Pizza | 1348120800 | 7 | 0.1429 |
| 9 | 3 | Never Again | 1 | 3 | How was Maffei's Pizza? | Maffei's Pizza | 1348120800 | 7 | 0.1429 |
all(pizza_jared_raw['polla_qid'] == pizza_jared_raw['pollq_id'])True
jared.head(10)[['polla_qid', 'answer', 'votes']]| polla_qid | answer | votes | |
|---|---|---|---|
| 0 | 2 | Excellent | 0 |
| 1 | 2 | Good | 6 |
| 2 | 2 | Average | 4 |
| 3 | 2 | Poor | 1 |
| 4 | 2 | Never Again | 2 |
| 5 | 3 | Excellent | 1 |
| 6 | 3 | Good | 1 |
| 7 | 3 | Average | 3 |
| 8 | 3 | Poor | 1 |
| 9 | 3 | Never Again | 1 |
jared_wide = jared.pivot(index='polla_qid', columns='answer', values='votes').reset_index()
jared_wide.columns.name = None
jared_wide.head()| polla_qid | Average | Excellent | Good | Never Again | Poor | |
|---|---|---|---|---|---|---|
| 0 | 2 | 4 | 0 | 6 | 2 | 1 |
| 1 | 3 | 3 | 1 | 1 | 1 | 1 |
| 2 | 4 | 1 | 4 | 2 | 0 | 1 |
| 3 | 5 | 0 | 1 | 1 | 0 | 3 |
| 4 | 6 | 12 | 2 | 10 | 0 | 4 |
jared[jared['question'] == "How was Rocco's Pizza Joint?"]| polla_qid | answer | votes | pollq_id | question | place | total_votes | percent | datetime | |
|---|---|---|---|---|---|---|---|---|---|
| 60 | 15 | Excellent | 1 | 15 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 4 | 0.2500 | 2013-09-17 12:37:49 |
| 61 | 15 | Good | 2 | 15 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 4 | 0.5000 | 2013-09-17 12:37:49 |
| 62 | 15 | Average | 0 | 15 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 4 | 0.0000 | 2013-09-17 12:37:49 |
| 63 | 15 | Poor | 1 | 15 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 4 | 0.2500 | 2013-09-17 12:37:49 |
| 64 | 15 | Never Again | 0 | 15 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 4 | 0.0000 | 2013-09-17 12:37:49 |
| 345 | 72 | Excellent | 6 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.2857 | 2019-03-06 12:56:33 |
| 346 | 72 | Good | 6 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.2857 | 2019-03-06 12:56:33 |
| 347 | 72 | Average | 7 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.3333 | 2019-03-06 12:56:33 |
| 348 | 72 | Poor | 1 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.0476 | 2019-03-06 12:56:33 |
| 349 | 72 | Fair | 1 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.0476 | 2019-03-06 12:56:33 |
jared.loc[349, 'answer'] = 'Never Again'
jared.iloc[345:350]| polla_qid | answer | votes | pollq_id | question | place | total_votes | percent | datetime | |
|---|---|---|---|---|---|---|---|---|---|
| 345 | 72 | Excellent | 6 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.2857 | 2019-03-06 12:56:33 |
| 346 | 72 | Good | 6 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.2857 | 2019-03-06 12:56:33 |
| 347 | 72 | Average | 7 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.3333 | 2019-03-06 12:56:33 |
| 348 | 72 | Poor | 1 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.0476 | 2019-03-06 12:56:33 |
| 349 | 72 | Never Again | 1 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.0476 | 2019-03-06 12:56:33 |
jared[jared['answer'] == 'Fair']| polla_qid | answer | votes | pollq_id | question | place | total_votes | percent | datetime | |
|---|---|---|---|---|---|---|---|---|---|
| 349 | 72 | Fair | 1 | 72 | How was Rocco's Pizza Joint? | Rocco's Pizza Joint | 21 | 0.0476 | 2019-03-06 12:56:33 |
Plots
What are the biggest discrepancies between the community and critic scores?
jared2.head()| polla_qid | answer | votes | pollq_id | question | place | time | total_votes | percent | answer_values | datetime | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Excellent | 0 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0000 | 5 | 2012-08-07 17:45:27 |
| 1 | 2 | Good | 6 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.4615 | 4 | 2012-08-07 17:45:27 |
| 2 | 2 | Average | 4 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.3077 | 3 | 2012-08-07 17:45:27 |
| 3 | 2 | Poor | 1 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.0769 | 2 | 2012-08-07 17:45:27 |
| 4 | 2 | Never Again | 2 | 2 | How was Pizza Mercato? | Pizza Mercato | 1344361527 | 13 | 0.1538 | 1 | 2012-08-07 17:45:27 |
jared2[jared2['place'] == '5 Boroughs Pizza']| polla_qid | answer | votes | pollq_id | question | place | time | total_votes | percent | answer_values | datetime | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 270 | 57 | Excellent | 1 | 57 | How was 5 Boroughs Pizza? | 5 Boroughs Pizza | 1513264990 | 3 | 0.3333 | 5 | 2017-12-14 15:23:10 |
| 271 | 57 | Good | 0 | 57 | How was 5 Boroughs Pizza? | 5 Boroughs Pizza | 1513264990 | 3 | 0.0000 | 4 | 2017-12-14 15:23:10 |
| 272 | 57 | Average | 2 | 57 | How was 5 Boroughs Pizza? | 5 Boroughs Pizza | 1513264990 | 3 | 0.6667 | 3 | 2017-12-14 15:23:10 |
| 273 | 57 | Poor | 0 | 57 | How was 5 Boroughs Pizza? | 5 Boroughs Pizza | 1513264990 | 3 | 0.0000 | 2 | 2017-12-14 15:23:10 |
| 274 | 57 | Never Again | 0 | 57 | How was 5 Boroughs Pizza? | 5 Boroughs Pizza | 1513264990 | 3 | 0.0000 | 1 | 2017-12-14 15:23:10 |
by_answers = (jared2.groupby(["place", "answer_values"])["votes"].sum().reset_index().groupby(
"answer_values"
)["votes"].sum().reset_index() )
by_answers['votes_perc'] = by_answers["votes"] / by_answers["votes"].sum()
by_answers| answer_values | votes | votes_perc | |
|---|---|---|---|
| 0 | 1 | 31 | 0.0292 |
| 1 | 2 | 104 | 0.0979 |
| 2 | 3 | 345 | 0.3249 |
| 3 | 4 | 368 | 0.3465 |
| 4 | 5 | 214 | 0.2015 |
fig, ax = plt.subplots(figsize=(10, 6))
fig.patch.set_facecolor('w')
ax.bar(by_answers['answer_values'], by_answers['votes_perc'])
fig.tight_layout()
sum(by_answers["votes_perc"] * by_answers["answer_values"])3.593220338983051
fig, ax = plt.subplots(figsize=(10, 6))
fig.patch.set_facecolor('w')
jared2.groupby(['place', ])
# ax.xaxis.set_major_formatter(dollar_formatter)
fig.tight_layout()